Single-cell RNA-seq
Before applying normalization and AWST, we filtered out cells that did not met the following criteria:
remove the genes detected in no cells.
filter out cells not having at least 500 observed features.
filter out cells not having at least 20 different intensities across the features.
if(!file.exists("Level3.RData")) {
ddata <- read_csv("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE100866&format=file&file=GSE100866_PBMC_vs_flow_10X-RNA_umi.csv.gz")
#restrict the features to the HUMAN ones
ddata <- ddata[grep("^HUMAN_", ddata$X1),]
ddata <- as.data.frame(ddata)
rownames(ddata) <- gsub("HUMAN_", "", ddata$X1)
ddata <- as.matrix(ddata[,-1])
save(ddata, file = "Level3.RData")
} else {
load("Level3.RData")
}
Apply AWST
if(!file.exists("expression.RData")) {
ddata <- t(ddata)
###
no_of_detected_gene_per_sample <- rowSums(ddata > 0)
fivenum(no_of_detected_gene_per_sample)# 10 638 739 873 4833
# restrict the collection of cells to those cells having at least 500 observed features
sum(no_of_detected_gene_per_sample > 500) # 7613
dim(ddata <- ddata[no_of_detected_gene_per_sample > 500,])#[1] 7613 17014
###
no_of_different_intensities <- apply(ddata, 1, function(x) length(table(x)))
fivenum(no_of_different_intensities)# 9 17 20 24 105
# restrict the collection of cells to those cells having at least 20 different intensities across the featutures
dim(ddata <- ddata[which(no_of_different_intensities >= 20), ])#[1] 4123 17014
###
# apply the full quantile normalization
normCounts <- EDASeq::betweenLaneNormalization(t(ddata), which = "full", round = FALSE)
save(normCounts, file = "normCounts.RData")
###
# apply the AWS-transformation
library(awst)
dim(exprData <- awst(normCounts, poscount = TRUE, full_quantile = TRUE)) #[1] 4123 17014
dim(exprData <- gene_filter(exprData, nBins = 30, heterogeneity_threshold = 0.05)) #[1] 4123 330
save(exprData, file = "expression.RData")
} else {
load("expression.RData")
}
Clustering and dimensionality reduction
if(!file.exists("expression_umap_2d.RData")) {
nrow_exprData <- nrow(exprData)
ncol_exprData <- ncol(exprData)
ddist <- dist(exprData)
save(ddist, nrow_exprData, ncol_exprData, file = "expression_dist.RData")
hhc <- hclust(ddist, method = "ward.D2")
save(hhc, nrow_exprData, ncol_exprData, file = "expression_dist_hclust.RData")
pprcomp <- prcomp(exprData)
pprcomp$x <- pprcomp$x[, 1:10]
pprcomp$rotation <- pprcomp$rotation[, 1:10]
save(pprcomp, file = "expression_prcomp.RData")
set.seed(2019) # needed to get the figure in the paper
ans_Rtsne <- Rtsne(exprData, pca = FALSE, perplexity = 250) # Run TSNE
save(ans_Rtsne, file = "expression_Rtsne_2d.RData")
set.seed(2019) # needed to get the figure in the paper
ans_umap <- umap(exprData)
save(ans_umap, file = "expression_umap_2d.RData")
}
load("annotation.RData")
load("expression_dist_hclust.RData")
annotation.df <- annotation.df[hhc$labels,]
###############
save_plots <- FALSE
png_width_large <- 1500
png_height_large <- 750
png_width_small <- width_png <- 700
png_height_small <- 700
png_res <- 1/300
###################
color.bar2 <- function(x_pos, y_pos, lut, min, max=-min, nticks=11, ticks=seq(min, max, len=nticks), title='', values = NULL) {
scale = (max-min)/length(lut)*0.3
for (i in 1:length(lut)) {
y_low <- (i-1)*scale + min + y_pos
y_high <- y_low + scale
rect(x_pos,y_low,x_pos+.05,y_high, col=lut[i], border=NA)
text(x_pos+.05, (y_low + y_high)/2, values[i], adj = -0.1)
}
}
vvalues <- c("-3.0", "-2.0", "-1.3", "-0.6", " 0.0", " 0.6", " 1.3", " 2.0", " 3.0")
ffill2 <- names(table(annotation.df$CD3.col))
Main Clustering
clustering.prefix <- "CBMC"; short.prefix <- "CBMC"
clustering.df <- data.frame(cell = annotation.df$cell)
rownames(clustering.df) <- clustering.df$cell
############
mmain <- paste0("CBMC study (", nrow_exprData, " cells/", ncol_exprData, " genes)")
if(save_plots) {
mmain <- ""; png("main_dendrogram.png", width= png_width_large, height= png_height_large, res = png_res)
}
hhc$height <- hhc$height/max(hhc$height)
plot(hhc, hang = -1, labels = FALSE, xlab = "", sub = "", main = mmain)
###
wwhere <- 10
hh <- mean(c(hhc$height[length(hhc$height)-wwhere+2], hhc$height[length(hhc$height)-wwhere+1]))
tmp <- tmp_ <- as.factor(cutree(hhc, k = wwhere))
levels(tmp) <- c( "1", "1", "3", "4", "5", "6", "6", "8","4", "5")
wwhere <- length(unique(levels(tmp)))
clusteringWhere <- paste0(clustering.prefix, wwhere)
clusteringWhere.col <- paste0(clusteringWhere, ".col")
assign(clusteringWhere.col, tmp)
if(wwhere < 10) levels(tmp) <- paste0(short.prefix, wwhere, 1:wwhere) else levels(tmp) <- paste0(short.prefix, wwhere, c(paste(1:9), letters[1:(wwhere-9)]))
assign(clusteringWhere, tmp)
levels(tmp) <- c("black", "red", "green3", "blue", "cyan", "magenta")
assign(clusteringWhere.col, tmp)
tt <- table(get(clusteringWhere), get(clusteringWhere.col))
colorCode <- colnames(tt)[apply(tt, 1, which.max)]
names(colorCode) <- rownames(tt)
assign(paste0(clusteringWhere, ".colorCode"), colorCode)
clust.colorCode <- colorCode
clustering.df$tmp <- get(clusteringWhere)
clustering.df$tmp.explanatory <- clustering.df$tmp
clustering.df$tmp.col <- get(clusteringWhere.col)
ncol_ <- ncol(clustering.df)
colnames(clustering.df)[(ncol_-2):ncol_] <- c(clusteringWhere, paste0(clusteringWhere, ".explanatory"), clusteringWhere.col)
levels(clustering.df[, paste0(clusteringWhere, ".explanatory")]) <- c("CBMC1 - T Cell", "CBMC2 - B Cell", "CBMC3 - unclear", "CBMC4 - Monocyte", "CBMC5 - myeloid DC", "CBMC6 - plasmacytoid DC")
annotation.col <- annotation.df[, grep(".col", colnames(annotation.df))]
colnames(annotation.col) <- gsub(".col", "", colnames(annotation.col))
annotation.col <- annotation.col[, rev(c( "CD19", "CD3", "CD11c", "CD14","CD4","CD8", "CD2","CD57"))]
annotation.col$CBMC <- clustering.df[, ncol(clustering.df)]
colored_bars(colors = annotation.col, dend = as.dendrogram(hhc), y_scale = 0.17, y_shift = 0.015)
save(clustering.df, clust.colorCode, file = "clustering.RData")
tt <- table(clustering.df[, ncol(clustering.df)-1])
pct <- paste0(round(100*tt/sum(tt), 1), "%")
llegend <- paste(names(tt), " (", tt, "; ", pct, ")", sep = "")
tt <- table(clustering.df[, ncol(clustering.df)-1], clustering.df[, ncol(clustering.df)])
ffill <- colnames(tt)[apply(tt, 1, which.max)]
legend(700, .99, legend=llegend, fill = ffill, y.intersp = 1, box.col = "white", border = "white", title = "CBMC", title.adj = 0)
color.bar3 <- function(x_pos, y_pos, lut, min, max=-min, nticks=11, ticks=seq(min, max, len=nticks), title='', values = NULL) {
scale = (max-min)/length(lut)*0.3
for (i in 1:length(lut)) {
y_low <- (i-1)*scale + y_pos
y_high <- y_low + scale
rect(x_pos,y_low,x_pos+90,y_high, col=lut[i], border=NA)
text(x_pos+.05, (y_low + y_high)/2, values[i], adj = -2)
}
}
color.bar3(4000, 0.65, ffill2, -0.6, values = vvalues)
text(4045, 0.63, "Marker's level")
text(1, 1, "(a)")

PCA (AWST) | ADT
x_legend <- -0.2; y_legend <- 2.63
x_text <- -2.3; y_text <- 2.
load("expression_prcomp.RData")
pprcomp$x <- scale(pprcomp$x)
mmain = paste0("Principal components analysis (", nrow_exprData, " cells/", ncol_exprData, " features)")
if(save_plots) png("prcomp_CITE6.png", width= png_width_small, height= png_height_small, res = png_res)
plot(pprcomp$x, col = clustering.df$CBMC6.col, main = mmain,
xlab = "first principal component", ylab = "secondo principal component", pch = 19)
legend(x_legend, y_legend, legend=llegend, fill = ffill, y.intersp = 1, box.col = "white", border = "white", title = "CBMC", title.adj = 0)
text(x_text, y_text, "(b)")

###############################
mmain <- "principal component analysis - CD3"# "prcomp (AWST)| flow cytometry/CD3"
cat(sprintf("\n\n### %s\n\n", mmain))
principal component analysis - CD3
if(save_plots) png(file = "prcomp_CD3.png", width = width_png, height = width_png)
plot(pprcomp$x, col = as.character(annotation.df$CD3.col), main = mmain,
xlab = "first principal component", ylab = "secondo principal component", pch = 19)
ffill2 <- names(table(annotation.df$CD3.col))
color.bar2(-2.3, -0.7, ffill2, -3, values = vvalues)
text(x_text, y_text, "(a)")

mmain <- "principal component analysis - CD19"# "prcomp (AWST)| flow cytometry/CD19"
cat(sprintf("\n\n### %s\n\n", mmain))
principal component analysis - CD19
if(save_plots) png(file = "prcomp_CD19.png", width = width_png, height = width_png)
plot(pprcomp$x, col = as.character(annotation.df$CD19.col), main = mmain,
xlab = "first principal component", ylab = "secondo principal component", pch = 19)
color.bar2(-2.3, -0.7, ffill2, -3, values = vvalues)
text(x_text, y_text, "(b)")

mmain <- "principal component analysis - CD11c"# "prcomp (AWST)| flow cytometry/CD11c"
cat(sprintf("\n\n### %s\n\n", mmain))
principal component analysis - CD11c
if(save_plots) png(file = "prcomp_CD11c.png", width = width_png, height = width_png)
plot(pprcomp$x, col = as.character(annotation.df$CD11c.col), main = mmain,
xlab = "first principal component", ylab = "secondo principal component", pch = 19)
color.bar2(-2.3, -0.7, ffill2, -3, values = vvalues)
text(x_text, y_text, "(c)")

mmain <- "principal component analysis - CD14"# "prcomp (AWST)| flow cytometry/CD14"
cat(sprintf("\n\n### %s\n\n", mmain))
principal component analysis - CD14
if(save_plots) png(file = "prcomp_CD14.png", width = width_png, height = width_png)
plot(pprcomp$x, col = as.character(annotation.df$CD14.col), main = mmain,
xlab = "first principal component", ylab = "secondo principal component", pch = 19)
color.bar2(-2.3, -0.7, ffill2, -3, values = vvalues)
text(x_text, y_text, "(d)")

mmain <- "principal component analysis - CD4"# "prcomp (AWST)| flow cytometry/CD4"
cat(sprintf("\n\n### %s\n\n", mmain))
principal component analysis - CD4
if(save_plots) png(file = "prcomp_CD4.png", width = width_png, height = width_png)
plot(pprcomp$x, col = as.character(annotation.df$CD4.col), main = mmain,
xlab = "first principal component", ylab = "secondo principal component", pch = 19)
color.bar2(-2.3, -0.7, ffill2, -3, values = vvalues)
text(x_text, y_text, "(e)")

mmain <- "principal component analysis - CD8"# "prcomp (AWST)| flow cytometry/CD8"
cat(sprintf("\n\n### %s\n\n", mmain))
principal component analysis - CD8
if(save_plots) png(file = "prcomp_CD8.png", width = width_png, height = width_png)
plot(pprcomp$x, col = as.character(annotation.df$CD8.col), main = mmain,
xlab = "first principal component", ylab = "secondo principal component", pch = 19)
color.bar2(-2.3, -0.7, ffill2, -3, values = vvalues)
text(x_text, y_text, "(f)")
